4b6018a8f59c85fc29c336292edb3274f3aa04ca,be.iminds.iot.dianne.rl.experience/test/be/iminds/iot/dianne/rl/experience/ExperiencePoolTest.java,ExperiencePoolTest,testExperiencePoolCycle,#,168
Before Change
sequence2.add(new ExperiencePoolSample(s3, a3, 0, s4));
sequence2.add(new ExperiencePoolSample(s4, a4, 0, s5));
sequence2.add(new ExperiencePoolSample(s5, a5, 0, null));
pool.addSequence(sequence2);
Assert.assertEquals(10, pool.size());
Assert.assertEquals(2, pool.sequences());
// third sequence - should cycle
List<ExperiencePoolSample> sequence3 = new ArrayList<>();
sequence3.add(new ExperiencePoolSample(s0, a0, 1, s1));
sequence3.add(new ExperiencePoolSample(s1, a1, 0, s2));
sequence3.add(new ExperiencePoolSample(s2, a2, 0, null));
pool.addSequence(sequence3);
Assert.assertEquals(9, pool.size());
Assert.assertEquals(2, pool.sequences());
// this should be first sample of sequence2
ExperiencePoolSample s = pool.getSample(0);
Assert.assertEquals(s1, s.getState());
Assert.assertEquals(a0, s.getAction());
Assert.assertEquals(0.0f, s.getScalarReward());
Assert.assertEquals(s1, s.getNextState());
Assert.assertEquals(false, s.isTerminal());
// this should be first sample of sequence3
s = pool.getSample(6);
Assert.assertEquals(s0, s.getState());
Assert.assertEquals(a0, s.getAction());
Assert.assertEquals(1.0f, s.getScalarReward());
Assert.assertEquals(s1, s.getNextState());
Assert.assertEquals(false, s.isTerminal());
// now add additional instances of sequence3
pool.addSequence(sequence3);
Assert.assertEquals(12, pool.size());
Assert.assertEquals(3, pool.sequences());
// and cycle some more
pool.addSequence(sequence2);
Assert.assertEquals(12, pool.size());
Assert.assertEquals(3, pool.sequences());
pool.addSequence(sequence2);
Assert.assertEquals(12, pool.size());
Assert.assertEquals(2, pool.sequences());
After Change
Assert.assertEquals(1, pool.sequences());
// second sequence
List<ExperiencePoolSample> sequence2 = new ArrayList<>();
// use s1 here as start so we can check whether we correctly cycled
// this sample should be sample 0 after cycling
sequence2.add(new ExperiencePoolSample(s1, a0, 0, s1));
sequence2.add(new ExperiencePoolSample(s1, a1, 0, s2));
sequence2.add(new ExperiencePoolSample(s2, a2, 0, s3));
sequence2.add(new ExperiencePoolSample(s3, a3, 0, s4));
sequence2.add(new ExperiencePoolSample(s4, a4, 0, s5));
sequence2.add(new ExperiencePoolSample(s5, a5, 0, null));
pool.addSequence(new Sequence<ExperiencePoolSample>(sequence2, 6));
Assert.assertEquals(10, pool.size());
Assert.assertEquals(2, pool.sequences());
// third sequence - should cycle
List<ExperiencePoolSample> sequence3 = new ArrayList<>();
sequence3.add(new ExperiencePoolSample(s0, a0, 1, s1));
sequence3.add(new ExperiencePoolSample(s1, a1, 0, s2));
sequence3.add(new ExperiencePoolSample(s2, a2, 0, null));
pool.addSequence(new Sequence<ExperiencePoolSample>(sequence3, 3));
Assert.assertEquals(9, pool.size());
Assert.assertEquals(2, pool.sequences());
// this should be first sample of sequence2
ExperiencePoolSample s = pool.getSample(0);
Assert.assertEquals(s1, s.getState());
Assert.assertEquals(a0, s.getAction());
Assert.assertEquals(0.0f, s.getScalarReward());
Assert.assertEquals(s1, s.getNextState());
Assert.assertEquals(false, s.isTerminal());
// this should be first sample of sequence3
s = pool.getSample(6);
Assert.assertEquals(s0, s.getState());
Assert.assertEquals(a0, s.getAction());
Assert.assertEquals(1.0f, s.getScalarReward());
Assert.assertEquals(s1, s.getNextState());
Assert.assertEquals(false, s.isTerminal());
// now add additional instances of sequence3
pool.addSequence(new Sequence<ExperiencePoolSample>(sequence3, 3));
Assert.assertEquals(12, pool.size());
Assert.assertEquals(3, pool.sequences());
// and cycle some more
pool.addSequence(new Sequence<ExperiencePoolSample>(sequence2, 6));
Assert.assertEquals(12, pool.size());
Assert.assertEquals(3, pool.sequences());